/* This program calculates the tax liabilities assuming that all couples are unmarried, regardless of whether they are or not */

cap log close
set more 1
clear



// March 2023, Alm, Leguizamon^2 (Marriage Tax and Race) //
// Make sure to set up the correct directory path and appropriate folders //
// Thi file is an adaptation of Ziliak's TAXSIM code (see Jim Ziliak's Website) /

#d cr 

/* Load the Household stripped files */
// upload the prepared data with households that are single, married or cohabitating //
use "dta\rawdata_ipumscps\maindata.dta", clear

/* It is assumed that the user has a ASEC file opened at this point. TAXSIM does not like too much data at once so going to run year-by-year and then reassemble below. The startyear and endyear can be modified by the user.  

Change the survey year to the tax year */
replace year=year-1



local period year
local startyear 1991
local endyear 2018

forvalues j=`startyear'/`endyear' {

preserve
keep if `j'==`period'

label var year "[TS2] Tax year"

/* define family relations  */

/*Unique household/family identifier */
label var uniqfm "Unique family identifier" 


/* Prepping income data for TAXSIM. (these will aggregate in next program) */

local earnings "inctot incwage incbus incfarm incss incwelfr incretir incssi incint incunemp incwkcom incvet incsurv incdisab incdivid incrent inceduc incchild incalim incasist incother incrann incpens oincbus oincfarm oincwage incdisa1 incdisa2 inclongj"
foreach var of local earnings{
replace `var'=0 if `var'>=999999998
} 
 
 
/* compute total number of heads per unique unit..should be 1...*/
//Find who has same income //
duplicates tag uniqcplid inctot if couples==1, gen(dupinc)

bysort uniqcplid: egen max_earncpl=max(inctot) if couples==1
gen hd=1 if max_earncpl==inctot & couples==1 & dupinc==0 & marst==1
gen sp=1 if max_earncpl!=inctot & couples==1 & dupinc==0 & marst==1

replace hd=head if couples==1 & dupinc==1 & marst==1
replace sp=spouse if couples==1 & dupinc==1 & marst==1 
replace hd=1 if couples==1 & cohab==1 & relate==101
replace sp=1 if couples==1 & cohab==1 & relate!=101

bysort uniqfm: egen tothd=total(hd)
tab tothd
bysort uniqfm: egen totsp=total(sp)
tab totsp

keep if tothd==1 & totsp==1  // There was a miscoding issue in the survey  //

replace hd=0 if hd==.
replace sp=0 if sp==.

// Identify married from unmarried couples // 
gen married=1 if marst==1 & (hd==1 | sp==1)
replace married=0 if married==. & (hd==1 | sp==1)

bysort uniqfm: egen marr_fm=max(married) // People belonging to married or unmarried couples  

// Create dependents, including those that are not children (either through disability or grandchildren) // 
gen dep=1 if hd==0 & sp==0 
replace dep=0 if dep==.

// create chidren of in the household : Already includes foster kids // 
gen child=0
replace child =1 if dep==1 & momloc==mother & poploc==father & age<24
replace child =1 if dep==1 & ((momloc==mother & poploc==0) | (poploc==father & momloc==0) | (momloc==father & poploc==0) | (poploc==mother & momloc==0)) & age<24
replace child =1 if dep==1 & relate==1242
replace child=0 if child==.

gen othrdep=0  // Other dependents: appear to be leaving in the family unit, but show that they receive disability- so we assume they are dependents. We could delete these to see if it changes anything.
replace othrdep=1 if dep==1 & momloc==mother & poploc==father & disability==1 & age>23
replace othrdep=1 if dep==1 & disability==1 & ((momloc==mother & poploc==0) | (poploc==father & momloc==0) | (momloc==father & poploc==0) | (poploc==mother & momloc==0)) & age>23 


// Check those that do not have a mom or a dad within the couple. (i.e. grand children etc. Start with 1995 - 6547 //
gen grandchildren=1 if child==0 & dep==1 & othrdep==0 & fndparent==1  & (momloc==0 & poploc!=0)
replace grandchildren=1 if child==0 & dep==1 & othrdep==0 & fndparent==1  & (momloc!=0 & poploc==0)
replace grandchildren=0 if grandchildren==. 

 

drop parent-fthr_correct // Delete previous variables used to find parents within the whole family

// Define children first according to Relationship to parent // 
gen parent1=pernum if hd==1
gen parent2=pernum if sp==1
bysort uniqfm: egen par1_numbr=max(parent1)
bysort uniqfm: egen par2_numbr=max(parent2)
gen ch_ppr=1 if child==1 & ((momloc==par1_numbr & poploc==par2_numbr) | (momloc==par2_numbr & poploc==par1_numbr)) // child living with both parents (head and spouse) 
replace ch_ppr=1 if  child==1 & relate==1242 & ch_ppr==. // include foster children
gen ch_1prt=1 if child==1 & ((momloc==par1_numbr & poploc!=par2_numbr) | (momloc==par2_numbr & poploc!=par1_numbr) | (momloc!=par1_numbr & poploc==par2_numbr) | (momloc!=par2_numbr & poploc==par1_numbr))  // child with 1 parent present 



// Claimer is just own number when there are no dependents in the family // 
bysort uniqfm: egen totdeps=sum(dep)
gen claim_nodep=pernum if totdeps==0 & (hd==1 | sp==1)


// Assign claimant to the children with 2 parents at home
bysort uniqfm: egen child_2prnt=max(ch_ppr) // creates a flag for the entire family saying that there is a child with both parents present in this family
bysort uniqfm: gen claimer_2prnt=pernum if hd==1 & child_2prnt==1 // identifies the person number for the head (highest earner) when there are children in the family where both parents are present
bysort uniqfm: egen claim_2prnt_tmp=max(claimer_2prnt) // This is a flag for the entire family with the person number for the head when there are children when both parents are present
gen claim_2prnt=claim_2prnt_tmp if claim_2prnt_tmp!=. & ch_ppr==1 // changes the claimant for the spouse (lower earner) in families with both parents present (claiming self) 
//replace claim_2prnt=pernum if claim_2prtn_tmp!=. & (hd==1 | sp==1) 


// Assign claimant to children with only 1 parent at home (we do these separately in case there are step children in families with children that have both parents present) 
gen prnt_fnd=momloc+poploc if ch_1prt==1 
bysort uniqfm: egen claim_1prnt_tmp=max(prnt_fnd)

gen claim_1prnt=claim_1prnt_tmp if claim_1prnt_tmp!=. & ch_1prt==1
//replace claim_1prnt=pernum if claim_1prnt_tmp!=. & (hd==1 & sp==1)



// Assign claimant to other dependents (older and not grandchildre) 
gen oth_2prnt=1 if othrdep==1  & ((momloc==par1_numbr & poploc==par2_numbr) | (momloc==par2_numbr & poploc==par1_numbr)) // both parents present
bysort uniqfm: egen oth_2prnt_fnd=max(oth_2prnt)
bysort uniqfm: gen claimer_oth_2prnt=pernum if hd==1 & oth_2prnt_fnd==1 
bysort uniqfm: egen claim_oth_2prnt_tmp=max(claimer_oth_2prnt)
gen claim_oth_2prnt=claim_oth_2prnt_tmp if claim_oth_2prnt_tmp!=. & oth_2prnt==1    
//replace claim_oth_2prnt=pernum if claim_oth_2prnt_tmp!=. & (hd==1 |sp==1) 

gen oth_1prt=1 if othrdep==1 & ((momloc==par1_numbr & poploc!=par2_numbr) | (momloc==par2_numbr & poploc!=par1_numbr) | (momloc!=par1_numbr & poploc==par2_numbr) | (momloc!=par2_numbr & poploc==par1_numbr)) // one parent present
gen oth_1prt_fnd=momloc+poploc if oth_1prt==1
bysort uniqfm: egen claim_oth_1prnt_tmp=max(oth_1prt_fnd)
gen claim_oth_1prnt=claim_oth_1prnt_tmp if claim_oth_1prnt_tmp!=. & oth_1prt==1
//replace claim_oth_1prnt=pernum if claim_oth_1prnt_tmp!=. & (hd==1 & sp==1)



// Find parent for grandchildren to assign same claimant as above //
gen parent_grnd=momloc+poploc if grandchildren==1 & ((poploc==0 & momloc!=0) | (poploc!=0 & momloc==0)) 
bysort uniqfm: egen parent_grnd_s=mean(parent_grnd)
gen findparent_claim_gr=claim_2prnt if pernum==parent_grnd_s & claim_2prnt!=. & claim_1prnt==. & claim_oth_2prnt==. & claim_oth_1prnt==. 
replace findparent_claim_gr=claim_1prnt if pernum==parent_grnd_s & claim_2prnt==. & claim_1prnt!=. & claim_oth_2prnt==. & claim_oth_1prnt==.
replace findparent_claim_gr=claim_oth_2prnt if pernum==parent_grnd_s & claim_2prnt==. & claim_1prnt==. & claim_oth_2prnt!=. & claim_oth_1prnt==.
replace findparent_claim_gr=claim_oth_1prnt if pernum==parent_grnd_s & claim_2prnt==. & claim_1prnt==. & claim_oth_2prnt==. & claim_oth_1prnt!=.

bysort uniqfm: egen claimer_grand=max(findparent_claim_gr)

gen claim_grnd=claimer_grand if grandchild==1 


replace claim_nodep=0 if claim_nodep==.
replace claim_2prnt=0 if claim_2prnt==.
replace claim_1prnt=0 if claim_1prnt==.
replace claim_oth_2prnt=0 if claim_oth_2prnt==.
replace claim_oth_1prnt=0 if claim_oth_1prnt==.
replace claim_grnd=0 if claim_grnd==.

// After finding the pernum for the person that will be claiming the dependents, create a the new code that will create the new family id (when single)
gen filers=0
replace filers=pernum if (hd==1 | sp==1) & claim_nodep==0
gen new_fm_sngl=claim_nodep+claim_2prnt+claim_1prnt+claim_oth_2prnt+claim_oth_1prnt+claim_grnd+filers

gen double new_uniqfm=(year*10000000000)+(serial*100000)+(familyid*100)+new_fm_sngl






//  define total dependents: including adults who live there and might be on some type of disability, all children, and children of children that are dependents (all other children that are not dependents have been dropped already) //
bysort new_uniqfm: egen depx=total(dep)


// Number of dependents is capped in the Tax code and Taxsim //
replace depx=15 if depx>15 
label var depx "[TS7] Number of dependents"

// Dependent children for EITC (dep18), other child credits (dep17), and dependent care (dep13) //
gen dependent13=0
replace dependent13=1 if (child==1 | grandchildren==1) & age<13
bysort new_uniqfm: egen dep13=total(dependent13)
replace dep13 = 15 if dep13 >15
label var dep13 "[TS8] Number of child care dependents"

gen dependent17=0
replace dependent17=1 if (child==1 | grandchildren==1) & age<17
bysort new_uniqfm: egen dep17=total(dependent17)
replace dep17 = 15 if dep17>15
label var dep17 "[TS9] Number of child tax credit dependents"


gen dependent18=0
replace dependent18=1 if (child==1 | grandchildren==1) & age<24
bysort new_uniqfm: egen dep18=total(dependent17)
replace dep18 = 15 if dep18>15
label var dep18 "[TS10] Number of EITC dependents"



/* taxsim command mstat to determine filing status */
gen mstat=0
replace mstat=1 if hd==1 | sp==1


label var mstat "[TS4] Tax filing marital statu"



 
 // Depending on the version of the ASEC used some of the income terms change names in the 1980s and will need to be modified */

/*Income: wage & salary, nonfarm self-employ, farm/nonincorporated SE. 
taxsim will not accept negative wages - put negatives in otherprop */


gen wages_i=  incwage + incbus + incfarm if incbus>=0 & incfarm>=0  
replace wages_i= incwage if incbus<0 | incfarm<0


/* private disability retirement income is considered earned income if under minimum retirement age, assumed 56 */
g prdisab1=0
replace prdisab1=1 if (srcdisa1 >=2 & srcdisa1 <=5)

g prdisab2=0
replace prdisab2=1 if (srcdisa2 >=2 & srcdisa2 <=5)

replace wages_i=wages_i + incdisa1 if age < 56 & prdisab1==1
replace wages_i=wages_i + incdisa2 if age < 56 & prdisab2==2
label var wages_i "Individual wages"

/*Income: dividends  */

gen dividends_i=incdivid
label var dividends_i "Individual dividends"


/* Income: interest */

gen intrec_i=incint
label var intrec_i "Individuals interest received"


/*Income: other property income  */

gen otherprop_i=incrent  if incbus>=0 & incfarm>=0
replace otherprop_i=incrent+incbus+incfarm if incbus<0 | incfarm<0

label var otherprop_i "Individual otherprop"

/*Income: non-property income such as alimony */

gen nonprop_i=incalim 

label var nonprop_i "Individual nonprop"


/*Income: private retirement funds  */

gen pensions_i= incretir

label var pensions_i "Individual pensions"

/* Income: social security, survivors, and disability –if under age 56 only assign non-retirement disability to pension */

gen gssi_i=   incss  + incsurv if age < 56
replace gssi_i=   gssi_i + incdisa1 if age < 56 & prdisab1==0
replace gssi_i=   gssi_i + incdisa2 if age < 56 & prdisab2==0
replace gssi_i = incss  + incsurv + incdisab if age > 55  

label var gssi_i "Individual gross social security"

/*Income: public assist/welfare, workers comp, veterans payments, child support, supplemental security income   */

gen transfers_i= incwelfr  + incwkcom + incvet  +  incchild  + incssi    

label var transfers_i "Individual transfer income"


/*Income: unemployment compensation       */

gen ui_i=  incunemp  
  
label var ui_i "Individual UI income" 



/*Ignored sources of income
         inced      Income: educational assistance
         incoth     Income: other sources
NOTE: cpgain and cploss are imputed by CPS - attached to "tax filing head", but they stop in 2008
      prptax imputation is also available and attached to every member of a household
      prptax WILL BE PICKED UP WHEN DATA ARE AGGREGATED TO TAX FILING UNIT
*/

/* Any self-employment income - pos or neg */

gen selfemp=(oincbus!=0) 

label var selfemp "Person had any self-emp income (pos or neg)" 

gen selfemp2=(oincbus!=0 | oincfarm!=0)

label var selfemp2 "Person had any self-emp or farm income (pos or neg)" 


/* Rename State for Taxsim. Variable name for STATE depends on version of ASEC used */

gen cpsstate=statefip
drop statefip
gen state=0
replace state=1 if cpsstate==01
replace state=2 if cpsstate==02
replace state=3 if cpsstate==04 
replace state=4 if cpsstate==05 
replace state=5 if cpsstate==06
replace state=6 if cpsstate==08 
replace state=7 if cpsstate==09
replace state=8 if cpsstate==10
replace state=9 if cpsstate==11
replace state=10 if cpsstate==12
replace state=11 if cpsstate==13
replace state=12 if cpsstate==15
replace state=13 if cpsstate==16
replace state=14 if cpsstate==17
replace state=15 if cpsstate==18
replace state=16 if cpsstate==19
replace state=17 if cpsstate==20
replace state=18 if cpsstate==21 
replace state=19 if cpsstate==22
replace state=20 if cpsstate==23
replace state=21 if cpsstate==24
replace state=22 if cpsstate==25
replace state=23 if cpsstate==26
replace state=24 if cpsstate==27
replace state=25 if cpsstate==28
replace state=26 if cpsstate==29
replace state=27 if cpsstate==30
replace state=28 if cpsstate==31
replace state=29 if cpsstate==32
replace state=30 if cpsstate==33 
replace state=31 if cpsstate==34
replace state=32 if cpsstate==35
replace state=33 if cpsstate==36 
replace state=34 if cpsstate==37
replace state=35 if cpsstate==38
replace state=36 if cpsstate==39 
replace state=37 if cpsstate==40
replace state=38 if cpsstate==41
replace state=39 if cpsstate==42 
replace state=40 if cpsstate==44
replace state=41 if cpsstate==45
replace state=42 if cpsstate==46 
replace state=43 if cpsstate==47
replace state=44 if cpsstate==48
replace state=45 if cpsstate==49 
replace state=46 if cpsstate==50
replace state=47 if cpsstate==51
replace state=48 if cpsstate==53 
replace state=49 if cpsstate==54
replace state=50 if cpsstate==55
replace state=51 if cpsstate==56

label var state "[TS3] State ID"


/*Age has been replaced in Taxsim27 with page and sage  */

gen age_hd=age if (hd==1 | sp==1)
bysort new_uniqfm: egen mn_age_head=max(age_hd)
gen page = mn_age_head

gen sage = 0

label var page "[TS5] Age of head "  
label var sage "[TS6] Age of spouse " 

 
/* create wages for head, spouse, and child, but do not assign child earnings. 
Some children probably need to file separately but this will be missed here.
Aggregate up using uniqfam */

gen hwages=wages_i if (hd==1 | sp==1)
bysort new_uniqfm: egen pwages=total(hwages)
gen swages=0
gen othwages_i=wages_i if dep==1
bysort new_uniqfm: egen othwages=total(othwages_i)
*replace pwages=pwages+othwages   // If replaced, assumes children do not file. For this analysis, they file on their own. 

label var pwages "[TS11] Primary taxpayer wage income" 
label var swages "[TS12] Secondary taxpayer wage income" 

/*Note: In a joint filing tax unit, there are two records, one for each as a primary taxpayer */

*************************************************** ;
* Sum up other income sources by uniqfam (everything except wages of dependents) 
*************************************************** ;
gen dividends=dividends_i
//bysort new_uniqfm: egen dividends_fm=total(dividends_i) 
gen intrec=intrec_i
//bysort new_uniqfm: egen intrec_fm=total(intrec_i) 
gen otherprop=otherprop_i
//bysort new_uniqfm: egen otherprop_fm=total(otherprop_i) 
gen nonprop=nonprop_i
//bysort new_uniqfm: egen nonprop_fm=total(nonprop_i) 
gen pensions=pensions_i
//bysort new_uniqfm: egen pensions_fm=total(pensions_i) 
gen gssi=gssi_i
//bysort new_uniqfm: egen gssi_fm=total(gssi_i) 
bysort new_uniqfm: egen transfers=total(transfers_i) 
gen ui=ui_i
//bysort new_uniqfm: egen ui_fm=total(ui_i) 

label var dividends "[TS13] Dividend income" 
label var intrec "[TS14] Interest income" 
label var otherprop "[TS17] Rental income" 
label var nonprop "[TS18] Alimony income" 
label var pensions "[TS19] Pensions/retirement income" 
label var gssi "[TS20] Social sec, supp sec, survivors, disability bens" 
label var transfers "[TS22] Welfare, WC, Vets, child supp" 
label var ui "[TS21] Unemployment compensation" 

*The following vars are unavailable 
gen rentpaid=0 
gen otheritem=0 
gen childcare=0   /* care_val is available starting with 2010 tax year */
gen mortgage=0 
label var rentpaid "[TS23] Rent paid - NOT AVAIL FOR CPS" 
label var otheritem "[TS25] Oth itemized deductions - NOT AVAIL FOR CPS" 
label var childcare "[TS26] Child care expenses - AVAIL FOR CPS starting 2011 survey" 
label var mortgage "[TS27] Mortgage interest paid - NOT AVAIL FOR CPS" 

/* The following vars are CPS imputations 

Property taxes paid. 

These are originally attached to all persons in the HOUSEHOLD 
Fix so that values are assigned only to one tax unit within hhold 
If there are multiple families in a hhhold, then split proptax across families */

gen prptax=proptax/families
drop proptax
gen proptax=0
replace proptax=prptax if max_earncpl==inctot & couples==1 & dupinc==0  // Claimed by the highest earner  
replace proptax=prptax/2 if max_earncpl==inctot & couples==1 & dupinc==1 // Split it if they earn the same

label var proptax "[TS24] Property tax paid (imputed by CPS)"

/* capital gains not collected in ASEC: They are imputed by CPS but only up to 2008 */

gen stcg=0
label var stcg "[TS15] Short-term cap gains - NOT AVAIL FOR CPS" 

gen ltcg=0
label var ltcg "[TS16] Long-term cap gains - NOT AVAIL FOR CPS" 

/* TAXSIM will not take negative values */

replace pwages=0 if pwages < 0
replace swages=0 if swages < 0 | swages==.
replace otherprop=0 if otherprop < 0
replace nonprop=0 if nonprop < 0
replace intrec=0 if intrec < 0
replace childcare=0 if childcare < 0
replace dividends=0 if dividends < 0
replace pensions=0 if pensions < 0
replace gssi=0 if gssi <0
replace transfers=0 if transfers <0
replace ui=0 if ui <0
replace proptax=0 if proptax < 0
replace proptax=0 if proptax==.
replace depx=0 if depx < 0
replace dep18=0 if dep18 < 0
replace dep17=0 if dep17 < 0
replace dep13=0 if dep13 < 0

/* ****RUN TAXSIM year by year and then reassemble **** */

count
keep if (hd==1 | sp==1)
keep year state mstat page sage depx dep13 dep17 dep18 pwages swages dividends intrec stcg ltcg otherprop nonprop pensions gssi ui transfers rentpaid proptax otheritem childcare mortgage race1 race2 race3 marst uniqfm new_uniqfm cohab couples dupinc hd sp hisp

taxsim35, full replace

bysort uniqfm: egen un_Fliability=total(fiitax)
bysort uniqfm: egen un_Sliability=total(siitax)
bysort uniqfm: egen un_AMTliab=total(v27)
gen un_Tliability=un_Fliability+un_Sliability // Total Liability //

keep if hd==1
keep year state race1 race2 race3 marst uniqfm new_uniqfm cohab couples dupinc hd sp un_Fliability un_Sliability un_Tliability un_AMTliab 

tempfile dump`j'
save "dta\Unmarried_Files\ dump`j'", replace

restore
}

use "dta\Unmarried_Files\ dump1991.dta",clear

local newstart 1992
forvalues i=`newstart'/`endyear' {
quietly append using "dta\Unmarried_Files\ dump`i'"
}

save "dta\Unmarried_Files\unmarried_sample.dta", replace
log close
